import os
import sys
import csv

USE_COMMAND_LINE_ARG = False
THRESHOLD_NUMBER = 800

if USE_COMMAND_LINE_ARG:
    filepath = str(sys.argv[1]) # File to scan for automated messages
    input_fname = os.path.basename(filepath)
    input_fname = str(input_fname.split(".")[0])
    output_prefix = filepath + input_fname
else :
    filepath = '[path_to_file_here]'

with open(filepath, 'r') as csvfile:
    reader = csv.reader(csvfile)
    first_row = True
    text_dict = {}
    for row in reader:
        if first_row:
            try:
                column_number = row.index('text')
            except ValueError:
                try: 
                    column_number = row.index('body')
                except ValueError:
                    print("Please have text or body as one of the labels in the .csv file")
            first_row = False
        else:
            text = row[column_number]
            text_dict[text] = text_dict.get(text, 0) + 1

remove_dict = {}

for item in text_dict.items():
    if item[1] >= THRESHOLD_NUMBER:
        print(item)
        remove_dict[item[0]] = item[1]
